# Tyler Moore
# Analysis for TOIT 2018 paper

library(data.table)
library(anytime)
library(survival)

################################################################################
# Data input and pre-processing
################################################################################
bar3<-read.csv("btcvolqtrbe.csv",header=T)
foo<-bar3[,1:3]

fcur<-read.csv("btcvolqtrcur.csv",header=T)

exct<-aggregate(name~currency+quarter,length,data=fcur)
names(exct)<-c("currency","quarter","numcurq")

fcur2<-merge(fcur,exct,by=c("currency","quarter"))
length(exct$numcurq)
length(exct$numcurq[exct$numcurq==1])
length(exct$numcurq[exct$numcurq==2])
fcur2$monoq<-ifelse(fcur2$numcurq==1,T,F)
fcur2$duoq<-ifelse(fcur2$numcurq==1|fcur2$numcurq==2,T,F)
#get the fraction of exchange's BTC volume in the quarter that came from mono/duopoly currency
fcur3<-aggregate(VolBTC~name+quarter+duoq,sum,data=fcur2)
fcur4<-fcur3[fcur3$duoq,c("name","quarter","VolBTC")]
names(fcur4)<-c("name","quarter","DuoBTC")
fcur5<-merge(fcur4,foo,by=c("name","quarter"),all.y=T)
fcur5$DuoBTC[is.na(fcur5$DuoBTC)]<-0
fcur5$fracduo<-fcur5$DuoBTC/fcur5$VolBTC

fcurm3<-aggregate(VolBTC~name+quarter+monoq,sum,data=fcur2)
fcurm4<-fcurm3[fcurm3$monoq,c("name","quarter","VolBTC")]
names(fcurm4)<-c("name","quarter","MonoBTC")
fcurm5<-merge(fcurm4,foo,by=c("name","quarter"),all.y=T)
fcurm5$MonoBTC[is.na(fcurm5$MonoBTC)]<-0
fcurm5$fracmono<-fcurm5$MonoBTC/fcurm5$VolBTC

#get breach data
bdf<-read.csv("breachStats-v2.csv",sep=",",header=T,colClasses=c("factor","character","character","character","character","integer","integer","numeric","numeric"))
bdf$breachDay<-as.Date(bdf$breachDate,format="%Y%m%d")
#bdf$closed<-ifelse(bdf$isClosed=="True",T,F)

bdf2<-bdf[,c("name","breachDay")]

bdf2$breachq<-cut(bdf2$breachDay,breaks="quarter")

bar4<-merge(bar3,bdf2,by.x=c("name","quarter"),by.y=c("name","breachq"),all.x=T)

fbt<-read.csv("finalBreachTable2.csv",sep=",",header=T)
fbt$lastday<-anydate(fbt$lastTradeDay)
fbt2<-fbt[,c('name','isClosed','lastday')]

bar5b<-merge(bar4,fbt2,by="name",all.y=T)
bar5<-merge(bar4,fbt2,by="name")
bar5$endq<-cut(bar5$lastday.y,breaks="quarter")
bar5$endqc<-as.character(bar5$endq)
bar5$quarterc<-as.character(bar5$quarter)
bar5$isclosedq<-F
bar5$isclosedq[bar5$isClosed&bar5$quarterc==bar5$endqc]<-T
bar5$isbreachq<-ifelse(!is.na(bar5$breachDay),T,F)

bar6<-bar5[,c('name','quarter','DailyBTC','isbreachq','isclosedq')]

bar6$DailyBTC[bar6$DailyBTC==Inf]<-1
bar6$DailyBTC[bar6$DailyBTC<1]<-1
bar6$namef<-factor(bar6$name)

twofa<-read.csv("time-series-simple.csv",sep=",",header=T,na.strings=c("NA","na"))
twofa$has2fa2<-T
twofa$has2fa2[twofa$has2fa=="no"]<-F
twofa$has2fa2[is.na(twofa$has2fa)]<-NA

twofa$predate<-anydate(twofa$pre2fa)
twofa$postdate<-anydate(twofa$post2fa)
twofa$always2fa<-ifelse(twofa$postdate==twofa$predate,T,F)
twofa$always2fa[(is.na(twofa$predate)|is.na(twofa$predate))&twofa$has2fa2]<-T
twofa$always2fa[(is.na(twofa$predate)|is.na(twofa$predate))&!twofa$has2fa2]<-F
twofa$diffdate<-twofa$postdate-twofa$predate
only2fa<-twofa[twofa$has2fa2&!is.na(twofa$always2fa),]

length(only2fa$name[only2fa$diffdate==0])
length(only2fa$name[only2fa$diffdate!=0])
median(only2fa$diffdate[only2fa$diffdate!=0])

twofa$post2faq<-cut(twofa$postdate,breaks="quarter")

twofa2<-merge(twofa,bdf2,by="name")
twofa2$diffbd<-twofa2$breachDay-twofa2$postdate

twofab<-twofa[,c("name","has2fa2","predate","postdate","post2faq","always2fa")]

bar7<-merge(bar6,twofab,by="name")
bar7$has2fa<-bar7$always2fa
bar7$has2fa[!bar7$always2fa&anydate(bar7$quarter)>=anydate(bar7$post2faq)]<-T

bar8<-merge(bar6,fcur5,by=c("name","quarter"))
bar8$mostduo<-ifelse(bar8$fracduo>=0.9,T,F)
bar8m<-merge(bar6,fcurm5,by=c("name","quarter"))
bar8m$mostmono<-ifelse(bar8m$fracmono>=0.9,T,F)

bar9<-merge(bar8,twofab,by="name")
bar9$has2fa<-bar9$always2fa
bar9$has2fa[!bar9$always2fa&anydate(bar9$quarter)>=anydate(bar9$post2faq)]<-T
bar9m<-merge(bar8m,twofab,by="name")
bar9m$has2fa<-bar9m$always2fa
bar9m$has2fa[!bar9m$always2fa&anydate(bar9m$quarter)>=anydate(bar9m$post2faq)]<-T

#remove outliers World Bitcoin Exchange. bitme and bitcoin-24.com
bar10 <- bar9
#find rows 547, 548, then change 546, isclosedq to T
bar10[bar10$name=="World Bitcoin Exchange",]
bar10[546,c("isclosedq")]<-T
bar10[bar10$name=="bitme",]  #remove 161
bar10[160,c("isclosedq")]<-T

bar10[bar10$name=="Global Bitcoin Exchange",]  #remove 338
bar10[337,c("isclosedq")]<-T
bar10<-bar10[-c(161,338,547,548),]

#add linear time trend
tt<-fread("timetrend.csv")
bar11<-merge(tt,bar10,by="quarter")
bar11$lgDailyBTC<-log(bar11$DailyBTC,2)

survex15<-read.table("finalBreachTable2.csv",sep=",",header=T)
survex15s<-survex15[,c("name","cc","aml","ToS","bugBounty","secAudit","coldStorage")]
bar12<-merge(bar11,survex15s,by="name")

#now convert bitcoin-24.com into two exchanges.
bar13<-bar12
bar13$namec<-as.character(bar13$name)
bar13$namec[bar13$namec=="Bitcoin-24.com"&bar13$qtime>=16]<-"Bitcoin-24.com V2"
bar13$name<-factor(bar13$namec)
bar13$isclosedq[bar13$name=="Bitcoin-24.com"&bar13$qtime==12]<-T

library(pglm)
library(Hmisc)

################################################################################
# Analysis
################################################################################

#confirm that the results hold for including outliers.
out11<-merge(tt,bar9,by="quarter")
out11$lgDailyBTC<-log(out11$DailyBTC,2)
out12<-merge(out11,survex15s,by='name')

outbaseline<-pglm(isclosedq ~ isbreachq + lgDailyBTC + qtime,data = out12,family = binomial('logit'), model = "pooling", method = "bfgs", print.level = 3, R = 5,index="name")
outplusduo<-pglm(isclosedq ~ isbreachq + lgDailyBTC + qtime + mostduo,data = out12,family = binomial('logit'), model = "pooling", method = "bfgs", print.level = 3, R = 5,index="name")
outplusaml<-pglm(isclosedq ~ isbreachq + lgDailyBTC + qtime + aml,data = out12,family = binomial('logit'), model = "pooling", method = "bfgs", print.level = 3, R = 5,index="name")
outplus2fa<-pglm(isclosedq ~ isbreachq + lgDailyBTC + qtime + has2fa,data = out12,family = binomial('logit'), model = "pooling", method = "bfgs", print.level = 3, R = 5,index="name")
summary(outbaseline)
summary(outplusduo)
summary(outplusaml)
summary(outplus2fa)

summary(pglm(isclosedq ~ lgDailyBTC + isbreachq + mostduo + qtime,data = bar12,family = binomial('logit'), model = "pooling", method = "bfgs", print.level = 3, R = 5,index="name"))
summary(pglm(isclosedq ~ lgDailyBTC + isbreachq + has2fa + qtime,data = bar12,family = binomial('logit'), model = "pooling", method = "bfgs", print.level = 3, R = 5,index="name"))

#check correlation among security variables
secvars4c<-unique(bar13[,c('name','has2fa2','bugBounty','secAudit','coldStorage')])

corsec<-rcorr(as.matrix(secvars4c[,c('has2fa2','bugBounty','secAudit','coldStorage')]),type="spearman")

##### Table VI
baseline<-pglm(isclosedq ~ isbreachq + lgDailyBTC + qtime,data = bar13,family = binomial('logit'), model = "pooling", method = "bfgs", print.level = 3, R = 5,index="name")
plusduo<-pglm(isclosedq ~ isbreachq + lgDailyBTC + qtime + mostduo,data = bar13,family = binomial('logit'), model = "pooling", method = "bfgs", print.level = 3, R = 5,index="name")
plusaml<-pglm(isclosedq ~ isbreachq + lgDailyBTC + qtime + aml,data = bar13,family = binomial('logit'), model = "pooling", method = "bfgs", print.level = 3, R = 5,index="name")
plus2fa<-pglm(isclosedq ~ isbreachq + lgDailyBTC + qtime + has2fa,data = bar13,family = binomial('logit'), model = "pooling", method = "bfgs", print.level = 3, R = 5,index="name")

dir.create(file.path("output"), showWarnings = FALSE)

modelcoef<-cbind(c(coef(baseline),NA),c(exp(coef(baseline)),NA),coef(plusduo),exp(coef(plusduo)),coef(plusaml),exp(coef(plusaml)),coef(plus2fa),exp(coef(plus2fa)))
write.table(format(modelcoef,digits=1),file="output/modelcoef.tex.raw",sep="& &",quote=F)
summary(baseline)
summary(plusduo)
summary(plusaml)
summary(plus2fa)
length(baseline$model$qtime)
length(plus2fa$model$qtime)
pvalues<-c(summary(baseline)[[6]][,4],summary(plusduo)[[6]][,4],summary(plusaml)[[6]][,4],summary(plus2fa)[[6]][,4])
names(pvalues)<-c("baseline (Intercept)","baseline BreachQ", "baseline lgDailyBTC","baseline qtime","duo (Intercept)","duo BreachQ", "duo lgDailyBTC","duo qtime", "duo mostduo","aml (Intercept)","aml BreachQ", "aml lgDailyBTC","aml qtime", "aml aml","2fa (Intercept)","2fa BreachQ", "2fa lgDailyBTC","2fa qtime", "2fa has2fa")



#summary(pglm(isclosedq ~ lgDailyBTC + isbreachq + bugBounty + qtime,data = bar12,family = binomial('logit'), model = "pooling", method = "bfgs", print.level = 3, R = 5,index="name"))
#summary(pglm(isclosedq ~ lgDailyBTC + isbreachq + secAudit + qtime,data = bar12,family = binomial('logit'), model = "pooling", method = "bfgs", print.level = 3, R = 5,index="name"))
corall<-rcorr(as.matrix(bar13[,c('isclosedq','isbreachq','lgDailyBTC','mostduo','has2fa','aml','qtime')]),type="spearman")

##### Tables III and V
write.table(format(corall$r,digits=1),file="output/predcor.tex.raw",sep="& &",quote=F)
write.table(format(corsec$r,digits=1),file="output/securitycor.tex.raw",sep="& &",quote=F)

##### Figure 4
su<-Surv(time=survex15$lifetime,event=survex15$isClosed,type='right')
pdf("output/survepoch.pdf",width=6,height=5)
par(mar=c(4.1,4.1,0.5,0.5))
plot(survfit(su~1),lwd=2,
	xlab="Days",
	ylab="Survival probability",
	cex.lab=1.3,
	cex.axis=1.3						   
     )
legend("topright",legend=c("Overall","95% CI"),col=c(1,1),lwd=2,lty=1:2)
dev.off()

fre<-ecdf(bar13$fracduo)
vole<-ecdf(bar13$DailyBTC)

##### Figure 5
pdf("output/cdfduo.pdf",width=12,height=5)
par(mfrow=c(1,2),mar=c(4.1,4.1,0.5,0.5))
plot(x=c(0,environment(vole)$x),y=c(0,environment(vole)$y),log='x',type='s',lwd=2,
     xlab="Daily BTC Volume Per Quarter Per Exchange",
     ylab="Probability Volume <= X")
plot(x=c(0,environment(fre)$x),y=c(0,environment(fre)$y),type='s',lwd=2,
     xlab="Fraction Quarterly BTC Volume from Mono/Duopoly Currency",
     ylab="Probability Fraction Volume <= X")
dev.off()

library(RColorBrewer)

##### Figure 2 
pdf("output/barct.pdf",width=9,height=5)
par(mar=c(6,4,0.5,0.5))
barplot(table(bar13$isclosedq,bar13$quarter),las=2,ylab="# Exchanges",col=brewer.pal(2,"Dark2"),angle=c(60,150),density=15,border=brewer.pal(2,"Dark2"))
legend("topleft",legend=c("Exchanges Open During Quarter","Exchanges Closed During Quarter"),col=brewer.pal(2,"Dark2"),angle=c(60,150),density=15,text.col=brewer.pal(2,"Dark2"),fill=brewer.pal(2,"Dark2"),border=brewer.pal(2,"Dark2"))
dev.off()

# exchanges

frbr<-4*prop.table(table(bar13$isbreachq,bar13$quarter),2)[2,]
frcl<-4*prop.table(table(bar13$isclosedq,bar13$quarter),2)[2,]

frduo<-prop.table(table(bar13$mostduo,bar13$quarter),2)[2,]

frmono<-prop.table(table(bar13$isbreachq,bar13$quarter),2)[2,]

library(zoo)

##### Figure 3
pdf("output/exprob.pdf",width=8,height=6)
par(mfrow=c(2,1),mar=c(2,5,0.5,0.5))
plot(rollmean(frcl,4,align="right"),x=anydate(names(frcl[4:19]))+45,type='l',ylab="exchange closure probability\n (annualized, rolling avg.)",xlab="",ylim=c(0,.5),lwd=2,col=2,lty=2)
plot(rollmean(frbr,4,align="right"),x=anydate(names(frbr[4:19]))+45,type='l',ylab="exchange breach probability\n (annualized, rolling avg.)",xlab="",ylim=c(0,.5),lwd=2,col=1)

dev.off()



